# === load relevant libraries
library(data.table)
library(ggplot2)
library(DescTools)
library(plm)
library(lfe)
library(dplyr)

# === Figure 5(a). rating change --> fund flow
rm(list = ls())

# read fund flow data
data = readRDS('input_data/monthly_fund_data.RDS')
data = data[, list(yyyymm, fundno, dr = rating - rating_1, category, flow, aum_1, ret)] # dr = rating change

# compute fund style adjusted returns
data = merge(data, data[, list(styleRet = weighted.mean(ret, aum_1)), list(yyyymm, category)], by = c('yyyymm','category'))
data[, adjret := ret - styleRet]
data[, styleRet := NULL]

# Get 36 lags of past returns, flows, and rating changes
yyyymms = sort(unique(data[, yyyymm]))
n = length(yyyymms)

data[, ret_past := 0]    # log of past 36m of fund returns
data[, adjret_past := 0] # log of past 36m of style-adjusted fund returns
data[, ret := log(1+ret)]       # turn into log returns
data[, adjret := log(1+adjret)]
for (i in 1:36){
  print(i)
  tmp = data.table(yyyymm = yyyymms[1:(n-i)], yyyymm_next = yyyymms[(i+1):n])
  data = merge(data, tmp, by = 'yyyymm', all.x = T)
  data = merge(data, data[, list(yyyymm = yyyymm_next, fundno, rr = ret, arr = adjret, ff = flow, xx = dr)], by = c('yyyymm','fundno'), all.x = T)
  data[, ret_past := ret_past + rr]
  data[, adjret_past := adjret_past + arr]
  data[, c('yyyymm_next','rr','arr') := NULL]
  setnames(data, c('ff','xx'), paste0(c('flow_','dr_'), i))
}
data = data[yyyymm >= 199401]
rm(i, yyyymms, n, tmp)

# turn past fund return and style-adjusted fund returns into deciles
for (this in unique(data[, yyyymm])){
  data[yyyymm == this, bin_ret_past := ntile(ret_past, 10)]
  data[yyyymm == this, bin_adjret_past := ntile(adjret_past, 10)]
}
rm(this)

# estimate fama-macbeth regressions of fund flow response to rating changes
ff = as.formula(flow ~ dr_1+dr_2+dr_3+dr_4+dr_5+dr_6+dr_7+dr_8+dr_9+dr_10+dr_11+dr_12+dr_13+dr_14+dr_15+dr_16+dr_17+dr_18+dr_19+dr_20+dr_21+dr_22+dr_23+dr_24+dr_25+dr_26+dr_27+dr_28+dr_29+dr_30+dr_31+dr_32+dr_33+dr_34+dr_35+dr_36
                + as.factor(bin_ret_past) + as.factor(bin_adjret_past)
                +flow_1+flow_2+flow_3+flow_4+flow_5+flow_6+flow_7+flow_8+flow_9+flow_10+flow_11+flow_12+flow_13+flow_14+flow_15+flow_16+flow_17+flow_18+flow_19+flow_20+flow_21+flow_22+flow_23+flow_24+flow_25+flow_26+flow_27+flow_28+flow_29+flow_30+flow_31+flow_32+flow_33+flow_34+flow_35+flow_36)
fit = pmg(ff, data, index=c("yyyymm"), model = 'mg'); gc()

# compute cumulative impulse-response to past rating changes and standard errors
cc = as.matrix(fit$coef[2:37])
C = fit$vcov[2:37,2:37]

out = data.table(hor = 0, coef = 0, se = 0)
for (i in 1:36){
  beta = matrix(c(rep(1, i), rep(0, 36-i)))
  out = rbind(out, data.table(hor = i, coef = (t(beta) %*% cc)[1], 
                              se = sqrt(t(beta) %*% C %*% beta)[1]))
}

# plot results
ggplot(out, aes(x = hor, y = coef)) + geom_line(lwd = 1) + 
  geom_ribbon(aes(ymin = coef-1.96*se, ymax = coef+1.96*se), alpha = .2) + 
  theme_classic() + labs(x = 'Months', y = 'Cumulative fund flow response') + 
  scale_y_continuous(labels = scales::percent_format(accuracy = 1)) + coord_cartesian(ylim = c(0, .08))



# === Figure 5(b). Flow-induced trading --> stock return
rm(list = ls())

# monthly stock returns
data = readRDS('input_data/monthly_stock_return.RDS')

# get 36 months of FIT (flow-induced trading) lags
fData = readRDS('input_data/flow_induced_trading.RDS')
yyyymms = sort(unique(fData[, yyyymm]))
n = length(yyyymms)
for (i in 1:36){
  print(i)
  tmp = data.table(yyyymm = yyyymms[1:(n-i)], yyyymm_next = yyyymms[(i+1):n])
  fData = merge(fData, tmp, by = 'yyyymm', all.x = T)
  fData = merge(fData, fData[, list(yyyymm = yyyymm_next, permno, xx = fit)], by = c('yyyymm','permno'), all.x = T)
  setnames(fData, 'xx', paste0('fit_', i))
  fData[, yyyymm_next := NULL]
}
rm(i, n, yyyymms, tmp); gc()
data = merge(data, fData, by = c('yyyymm','permno')); rm(fData)
data = data[0 == rowSums(is.na(data))]; gc()

# estimate cap-weighted Fama-MacBeth regressions
ff = as.formula(ret ~ fit + fit_1+fit_2+fit_3+fit_4+fit_5+fit_6+fit_7+fit_8+fit_9+fit_10+fit_11+fit_12+fit_13+fit_14+fit_15+fit_16+fit_17+fit_18+fit_19+fit_20+fit_21+fit_22+fit_23+fit_24+fit_25+fit_26+fit_27+fit_28+fit_29+fit_30+fit_31+fit_32+fit_33+fit_34+fit_35+fit_36)
yyyymms = sort(unique(data[, yyyymm]))
n = length(yyyymms)
p.estimate_one_month = function(thisM){
  tmp = copy(data[yyyymm == thisM])
  fit = felm(ff, tmp, weights = tmp[, me_1])
  return(data.table(yyyymm = thisM, lag = 0:36, cumcoef = cumsum(fit$coef[2:38])))
}

out = Reduce(rbind, lapply(yyyymms, p.estimate_one_month)); gc()
out = out[, list(coef = mean(cumcoef), se = sd(cumcoef)/sqrt(n)), lag]
out = rbind(data.table(lag = -1, coef = 0, se = 0), out)

# plot results
ggplot(out, aes(x = lag, y = coef)) + geom_line(lwd = 1) + 
  geom_ribbon(aes(ymin = coef-1.96*se, ymax = coef+1.96*se), alpha = .2) + 
  theme_classic() + labs(x = 'Months', y = 'Cumulative price response') + 
  coord_cartesian(ylim = c(-.2, .8)) + geom_hline(yintercept = 0, lty = 3)

# === Figure 5(c): rating change --> stock return
rm(list = ls())

out = readRDS('input_data/return_response_to_rating_changes.RDS')
A = 0.04015585
delta = 0.76994521
out[, fitted := A * delta^(lag - 1)]

ggplot(out, aes(x = lag)) + geom_point(aes(y = coef_estimate), cex = 2) + geom_line(aes(y = fitted), col = 3, lwd = 1) + 
  theme_classic() + labs(x = 'Months', y = 'Price response') + coord_cartesian(ylim = c(-.01, .04)) + 
  scale_y_continuous(labels = scales::percent_format(accuracy = 1)) + geom_hline(yintercept = 0, lty = 3)


# === Figrue 5(d): Price pressure and reversion
rm(list = ls())

# stock returns
data = readRDS('input_data/monthly_stock_return.RDS')

# get subsequent 36 months of returns
yyyymms = sort(unique(data[, yyyymm]))
n = length(yyyymms)
for (i in 1:36){
  print(i)
  tmp = data.table(yyyymm_prev = yyyymms[1:(n-i)], yyyymm = yyyymms[(i+1):n])
  data = merge(data, tmp, by = 'yyyymm', all.x = T)
  data = merge(data, data[, list(yyyymm = yyyymm_prev, permno, xx = ret)], by = c('yyyymm','permno'), all.x = T)
  setnames(data, 'xx', paste0('ret',i))
  data[, yyyymm_prev := NULL]
}
data = data[yyyymm <= 201612]; gc()
data = data[!is.na(ret)]
setnames(data, 'ret', 'ret0')

# merge with exponential sum of ratings at the stock-level
tmp = readRDS('input_data/monthly_stock_ratings.RDS')[!is.na(expSum), list(yyyymm, permno, expSum)]
tmp = tmp[yyyymm >= 199101]
data = merge(data, tmp, by = c('yyyymm','permno'), all.x = T); rm(tmp); gc()
data = data[!is.na(expSum)]

# sort stocks into NYSE deciles using expSum in each month
data[, bin := 1]
for (i in 1:9){
  print(i)
  data = merge(data, data[primexch == 'N', list(cut = quantile(expSum, i/10)), yyyymm], by = 'yyyymm', all.x = T)
  data[expSum > cut, bin := i+1]
  data[, cut := NULL]
}
rm(i, n, yyyymms)

# summarize returns after sorting
out = data.table()
for (i in 0:36){
  print(i)
  setnames(data, paste0('ret',i), 'xx')
  out = rbind(out, data[, list(hor = i, ret = weighted.mean(xx, me_1, na.rm = T)), bin])
  setnames(data, 'xx', paste0('ret',i))
}
out = out[order(hor, bin)]
rm(i); gc()

# to visualize cross-sectional difference, demean by period
out = merge(out, out[, list(m = mean(ret)), hor])
out[, ret := ret - m]
out[, m := NULL]

# just keep top and bottom deciles. Compute cumulative log returns
out = out[bin %in% c(1,10)]
out[bin == 1, cumlogret := cumsum(log(1+ret))]
out[bin ==10, cumlogret := cumsum(log(1+ret))]

# get bootstrapped standard errors
tmp = readRDS('input_data/stock_by_fit_bootstrapped_standard_errors.RDS')
out = merge(out, tmp, by = c('hor','bin')); rm(tmp)

# plot
out[, lab := ifelse(bin == 1, 'Bottom decile', 'Top decile')]
ggplot(out, aes(x = hor, y = cumlogret, fill = lab)) + geom_line() + 
  geom_ribbon(aes(ymin = cumlogret - 1.96*se, ymax = cumlogret + 1.96*se), alpha = .2) + 
  theme_classic() + labs(x = 'Months', y = 'Cumulative log return') + coord_cartesian(ylim = c(-.15, .1)) + 
  theme(legend.title = element_blank(), legend.position = c(.8, .9)) + geom_hline(yintercept = 0, lty = 3) + 
  scale_y_continuous(labels = scales::percent_format(accuracy = 1))
